In [1]:
#Please use the below command to install plotly. It has been used in this project and will be required for the plots to be generated. 
#pip install plotly==4.9.0
In [2]:
#Data files for this project was extracted from the government of India website for statistical information.
#Here we will be exploring the accidents.
import xlrd
import json
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import plotly.offline as po
import plotly.graph_objs as go
import plotly.express as px
In [3]:
def add_year_column(year,nofrows):
    year_col = []

    for a in nofrows:
        year_col.append(year)

    year_df = pd.Series(year_col)
    return year_df
In [4]:
# Read Excel file for Natural and Unatural Causes with multiple sheets with Statewise statistics, Citywise statistics and Overall 
# statistics for the years

loc = ('Natural and Unnatural.xlsx')

# To open Workbook 
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0) 
sheet_list1 = wb.sheet_names()
a = 0
for i in range(20):
    if(sheet.cell_value(i,0)) == 1.0:
        a = i
        break
df_input = pd.read_excel('Natural and Unnatural.xlsx', sheet_by_index=0, skiprows=a, nrows=37)

b=0
sheet_temp = wb.sheet_by_index(1)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'Year':
        b = i
        break
        
df_accident_naun = pd.read_excel('Natural and Unnatural.xlsx',sheet_name=sheet_list1[1], skiprows=b, nrows=16)
df_accident_naun=df_accident_naun.set_index('Year')

C=0
sheet_temp = wb.sheet_by_index(2)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'City':
        c = i
        break
df_accident_naun_city = pd.read_excel('Natural and Unnatural.xlsx',sheet_name=sheet_list1[2], skiprows=c, nrows=55)
In [5]:
# Dataframe to be generated for the statewise statistics for natural and unnatural causes
# The excel columns are parsed and renamed with identical coulumns to apporach the dataframe in an organized fashion

nat_column_lbl = ['Avalanche', 'Lightning','HeatStroke', 'Flood', 'ColdWeather', 'Cyclone',
             'Starvation', 'Earthquake', 'Epidemic','TorrentialRains', 'OtherNatural']
unnat_column_lbl = ['AirCrash','StructureCollapse','Drowning','Electrocution',
             'Explosion','Falls','FMAccidents','Fire','Firearms','SuddenDeath',
             'KilledbyAnimal','MQDisaster','Poisioning','Stampede','Suffocation',
             'Trafficaccident','OtherCauses','Unknowncauses']

states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
'NL','OR','PB','RJ','SK','TN','TS','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']


df_complete = pd.DataFrame()
df_temp = pd.DataFrame()

years = [2008,2009,2010,2011,2012,2013,2014,2015]
cols0 = [2,3,11,19,27,35,43,51,59,67,75,83,91,99,107,115,123,131,139,147,155,163,171,179,
                            187,195,203,211,219,227,235,243]
cols1 = [2,4,12,20,28,36,44,52,60,68,76,84,92,100,108,116,124,132,140,148,156,164,172,180,
                            188,196,204,212,220,228,236,244]
cols2 = [2,5,13,21,29,37,45,53,61,69,77,85,93,101,109,117,125,133,141,149,157,165,173,181,
                            189,197,205,213,221,229,237,245]
cols3 = [2,6,14,22,30,38,46,54,62,70,78,86,94,102,110,118,126,134,142,150,158,166,174,182,
                            190,198,206,214,222,230,238,246]
cols4 = [2,7,15,23,31,39,47,55,63,71,79,87,95,103,111,119,127,135,143,151,159,167,175,183,
                            191,199,207,215,223,231,239,247]
cols5 = [2,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120,128,136,144,152,160,168,176,184,
                            192,200,208,216,224,232,240,248]
cols6 = [2,9,17,25,33,41,49,57,65,73,81,89,97,105,113,121,129,137,145,153,161,169,177,185,
                            193,201,209,217,225,233,241,249]
cols7 = [2,10,18,26,34,42,50,58,66,74,82,90,98,106,114,122,130,138,146,154,162,170,178,186,
                            194,202,210,218,226,234,242,250]
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6, cols7]

for i, j in zip(years,datacols):
    df_temp = df_input[j]
    year_df = add_year_column(i,np.arange(len(df_temp)))
    df_temp.insert(0,'Year',year_df)
    if i == 2008:  
        df_temp = df_temp.rename(columns={2:'State', 3:'Avalanche', 11:'Lightning',
             19:'HeatStroke', 27:'Flood', 35:'ColdWeather',  43:'Cyclone',
             51:'Starvation', 59:'Earthquake', 67:'Epidemic', 75:'TorrentialRains', 
             83:'Other',91:'NTotal',99:'AirCrash',107:'StructureCollapse',115:'Drowning',123:'Electrocution',
             131:'Explosion',139:'Falls',147:'FMAccidents',155:'Fire',163:'Firearms',171:'SuddenDeath',
             179:'KilledbyAnimal',187:'MQDisaster',195:'Poisioning',203:'Stampede',211:'Suffocation',
             219:'Trafficaccident',227:'OtherCauses',235:'Unknowncauses',243:'UnTotal'}) 
    elif i == 2009:
        df_temp = df_temp.rename(columns={2:'State', 4:'Avalanche', 12:'Lightning',
             20:'HeatStroke', 28:'Flood', 36:'ColdWeather',  44:'Cyclone',
             52:'Starvation', 60:'Earthquake', 68:'Epidemic', 76:'TorrentialRains', 
             84:'Other',92:'NTotal',100:'AirCrash',108:'StructureCollapse',116:'Drowning',124:'Electrocution',
             132:'Explosion',140:'Falls',148:'FMAccidents',156:'Fire',164:'Firearms',172:'SuddenDeath',
             180:'KilledbyAnimal',188:'MQDisaster',196:'Poisioning',204:'Stampede',212:'Suffocation',
             220:'Trafficaccident',228:'OtherCauses',236:'Unknowncauses',244:'UnTotal'}) 
    elif i == 2010:
        df_temp = df_temp.rename(columns={2:'State', 5:'Avalanche', 13:'Lightning',
             21:'HeatStroke', 29:'Flood', 37:'ColdWeather',  45:'Cyclone',
             53:'Starvation', 61:'Earthquake', 69:'Epidemic', 77:'TorrentialRains', 
             85:'Other',93:'NTotal',101:'AirCrash',109:'StructureCollapse',117:'Drowning',125:'Electrocution',
             133:'Explosion',141:'Falls',149:'FMAccidents',157:'Fire',165:'Firearms',173:'SuddenDeath',
             181:'KilledbyAnimal',189:'MQDisaster',197:'Poisioning',205:'Stampede',213:'Suffocation',
             221:'Trafficaccident',229:'OtherCauses',237:'Unknowncauses',245:'UnTotal'}) 
    elif i == 2011:
        df_temp = df_temp.rename(columns={2:'State', 6:'Avalanche', 14:'Lightning',
             22:'HeatStroke', 30:'Flood', 38:'ColdWeather',  46:'Cyclone',
             54:'Starvation', 62:'Earthquake', 70:'Epidemic', 78:'TorrentialRains', 
             86:'Other',94:'NTotal',102:'AirCrash',110:'StructureCollapse',118:'Drowning',126:'Electrocution',
             134:'Explosion',142:'Falls',150:'FMAccidents',158:'Fire',166:'Firearms',174:'SuddenDeath',
             182:'KilledbyAnimal',190:'MQDisaster',198:'Poisioning',206:'Stampede',214:'Suffocation',
             222:'Trafficaccident',230:'OtherCauses',238:'Unknowncauses',246:'UnTotal'}) 
    elif i == 2012:
        df_temp = df_temp.rename(columns={2:'State', 7:'Avalanche', 15:'Lightning',
             23:'HeatStroke', 31:'Flood', 39:'ColdWeather',  47:'Cyclone',
             55:'Starvation', 63:'Earthquake', 71:'Epidemic', 79:'TorrentialRains', 
             87:'Other',95:'NTotal',103:'AirCrash',111:'StructureCollapse',119:'Drowning',127:'Electrocution',
             135:'Explosion',143:'Falls',151:'FMAccidents',159:'Fire',167:'Firearms',175:'SuddenDeath',
             183:'KilledbyAnimal',191:'MQDisaster',199:'Poisioning',207:'Stampede',215:'Suffocation',
             223:'Trafficaccident',231:'OtherCauses',239:'Unknowncauses',247:'UnTotal'}) 
    elif i == 2013:
        df_temp = df_temp.rename(columns={2:'State', 8:'Avalanche', 16:'Lightning',
             24:'HeatStroke', 32:'Flood', 40:'ColdWeather',  48:'Cyclone',
             56:'Starvation', 64:'Earthquake', 72:'Epidemic', 80:'TorrentialRains', 
             88:'Other',96:'NTotal',104:'AirCrash',112:'StructureCollapse',120:'Drowning',128:'Electrocution',
             136:'Explosion',144:'Falls',152:'FMAccidents',160:'Fire',168:'Firearms',176:'SuddenDeath',
             184:'KilledbyAnimal',192:'MQDisaster',200:'Poisioning',208:'Stampede',216:'Suffocation',
             224:'Trafficaccident',232:'OtherCauses',240:'Unknowncauses',248:'UnTotal'}) 
    elif i == 2014:
        df_temp = df_temp.rename(columns={2:'State', 9:'Avalanche', 17:'Lightning',
             25:'HeatStroke', 33:'Flood', 41:'ColdWeather',  49:'Cyclone',
             57:'Starvation', 65:'Earthquake', 73:'Epidemic', 81:'TorrentialRains', 
             89:'Other',97:'NTotal',105:'AirCrash',113:'StructureCollapse',121:'Drowning',129:'Electrocution',
             137:'Explosion',145:'Falls',153:'FMAccidents',161:'Fire',169:'Firearms',177:'SuddenDeath',
             185:'KilledbyAnimal',193:'MQDisaster',201:'Poisioning',209:'Stampede',217:'Suffocation',
             225:'Trafficaccident',233:'OtherCauses',241:'Unknowncauses',249:'UnTotal'}) 
    elif i == 2015:
        df_temp = df_temp.rename(columns={2:'State', 10:'Avalanche', 18:'Lightning',
             26:'HeatStroke', 34:'Flood', 42:'ColdWeather',  50:'Cyclone',
             58:'Starvation', 66:'Earthquake', 74:'Epidemic', 82:'TorrentialRains', 
             90:'Other',98:'NTotal',106:'AirCrash',114:'StructureCollapse',122:'Drowning',130:'Electrocution',
             138:'Explosion',146:'Falls',154:'FMAccidents',162:'Fire',170:'Firearms',178:'SuddenDeath',
             186:'KilledbyAnimal',194:'MQDisaster',202:'Poisioning',210:'Stampede',218:'Suffocation',
             226:'Trafficaccident',234:'OtherCauses',242:'Unknowncauses',250:'UnTotal'}) 
    df_complete = df_complete.append(df_temp)
df_complete.sort_values(by=['Year','State'], inplace=True)
df_state_naun = df_complete[df_complete['Year'] > 2008]
In [6]:
# Dataframe to be generated for the citywise statistics for natural and unnatural causes
# The excel columns are parsed and renamed with identical coulumns to apporach the dataframe in an organized fashion
df_city_acci_naun = pd.DataFrame()
df_temp2 = pd.DataFrame()
arr2 = ['Year','City','State','Avalanche', 'Lightning','HeatStroke', 'Flood', 'ColdWeather', 'Cyclone',
             'Starvation', 'Earthquake', 'Epidemic','TorrentialRains', 'Other', 'NTotal',
            'AirCrash','StructureCollapse','Drowning','Electrocution',
             'Explosion','Falls','FMAccidents','Fire','Firearms','SuddenDeath',
             'KilledbyAnimal','MQDisaster','Poisioning','Stampede','Suffocation',
             'Trafficaccident','OtherCauses','Unknowncauses','UnTotal']

years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8',
        '2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14', '2009.15', '2009.16', '2009.17',
         '2009.18', '2009.19', '2009.20', '2009.21', '2009.22', '2009.23','2009.24', '2009.25', '2009.26',
         '2009.27', '2009.28', '2009.29', '2009.30']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8',
        '2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14', '2010.15', '2010.16', '2010.17',
         '2010.18', '2010.19', '2010.20', '2010.21', '2010.22', '2010.23','2010.24', '2010.25', '2010.26',
         '2010.27', '2010.28', '2010.29', '2010.30']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8',
        '2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14', '2011.15', '2011.16', '2011.17',
         '2011.18', '2011.19', '2011.20', '2011.21', '2011.22', '2011.23','2011.24', '2011.25', '2011.26',
         '2011.27', '2011.28', '2011.29', '2011.30']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8',
        '2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14', '2012.15', '2012.16', '2012.17',
         '2012.18', '2012.19', '2012.20', '2012.21', '2012.22', '2012.23','2012.24', '2012.25', '2012.26',
         '2012.27', '2012.28', '2012.29', '2012.30']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8',
        '2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14', '2013.15', '2013.16', '2013.17',
         '2013.18', '2013.19', '2013.20', '2013.21', '2013.22', '2013.23','2013.24', '2013.25', '2013.26',
         '2013.27', '2013.28', '2013.29', '2013.30']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8',
        '2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14', '2014.15', '2014.16', '2014.17',
         '2014.18', '2014.19', '2014.20', '2014.21', '2014.22', '2014.23','2014.24', '2014.25', '2014.26',
         '2014.27', '2014.28', '2014.29', '2014.30']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8',
        '2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14', '2015.15', '2015.16', '2015.17',
         '2015.18', '2015.19', '2015.20', '2015.21', '2015.22', '2015.23','2015.24', '2015.25', '2015.26',
         '2015.27', '2015.28', '2015.29', '2015.30']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]

for i, j in zip(years,datacols):
    df_temp2 = df_accident_naun_city[j]
    year_df = add_year_column(i,np.arange(len(df_temp2)))
    df_temp2.insert(0,'Year',year_df)
    if i == 2009:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]}) 
    elif i == 2010:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})  
    elif i == 2011:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]}) 
    elif i == 2012:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})  
    elif i == 2013:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})  
    elif i == 2014:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]}) 
    elif i == 2015:
        arr = df_temp2.columns
        df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
             arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
             arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})  
    df_city_acci_naun = df_city_acci_naun.append(df_temp2)

df_city_acci_naun.sort_values(by=['Year','State','City'], inplace=True)
df_city_naun = df_city_acci_naun

df_city_acci_naun = df_city_acci_naun.set_index(['Year','State','City'])
In [7]:
#The accidental death statistics have a 3rd category - suicide. This is captured in a separate document and 
#contains 4 sheets in one document.
suicide = ('Statewise-Suicide.xlsx')
# To open Workbook 
wb2 = xlrd.open_workbook(suicide)
sheet_list = wb2.sheet_names()
a=0
sheet_temp = wb2.sheet_by_index(0)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'Year':
        a = i
b=0
sheet_temp = wb2.sheet_by_index(1)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'Name':
        b = i
        
c=0
sheet_temp = wb2.sheet_by_index(2)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'City':
        c = i

d=0
sheet_temp = wb2.sheet_by_index(3)
for i in range(20):
    chk = sheet_temp.cell_value(i,0).replace(" ","")
    if chk == 'City':
        d = i
In [8]:
# Suicide statistics by year
df_suicide_year = pd.read_excel('Statewise-Suicide.xlsx', sheet_by_index=0, skiprows=a)

arr = df_suicide_year.columns
arr2= ['Year', 'Upto14years', 'B1529years', 'B3044years', 'B4559years',
       'Above60years', 'Male', 'Female', 'Transgender', 'Total',
       'BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
       'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
       'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
       'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide',
       'TotalC']
df_suicide_year = df_suicide_year.rename(
    columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
             arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
             arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24]})
In [9]:
# Suicide statistics by state and year with all causes with gender and age statistics included
df_suicide_state = pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[1], skiprows=b)
states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
'NL','OR','PB','RJ','SK','TN','TS','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']

df_comp_suicide = pd.DataFrame()
df_temp2 = pd.DataFrame()


years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['State','2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14', '2009.15', '2009.16', '2009.17',
         '2009.18', '2009.19', '2009.20', '2009.21', '2009.22', '2009.23']
cols1 = ['State','2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14', '2010.15', '2010.16', '2010.17',
         '2010.18', '2010.19', '2010.20', '2010.21', '2010.22', '2010.23']
cols2 = ['State','2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14', '2011.15', '2011.16', '2011.17',
         '2011.18', '2011.19', '2011.20', '2011.21', '2011.22', '2011.23']
cols3 = ['State','2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14', '2012.15', '2012.16', '2012.17',
         '2012.18', '2012.19', '2012.20', '2012.21', '2012.22', '2012.23']
cols4 = ['State','2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14', '2013.15', '2013.16', '2013.17',
         '2013.18', '2013.19', '2013.20', '2013.21', '2013.22', '2013.23']
cols5 = ['State','2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14', '2014.15', '2014.16', '2014.17',
         '2014.18', '2014.19', '2014.20', '2014.21', '2014.22', '2014.23']
cols6 = ['State','2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14', '2015.15', '2015.16', '2015.17',
         '2015.18', '2015.19', '2015.20', '2015.21', '2015.22', '2015.23']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]

for i, j in zip(years,datacols):
    df_temp2 = df_suicide_state[j]
    year_df = add_year_column(i,np.arange(len(df_temp2)))
    df_temp2.insert(0,'Year',year_df)
    if i == 2009:
        df_temp2 = df_temp2.rename(columns={'State':'State','2009.9':'BankruptcyorIndebtness', '2009.10':'Illness', 
        '2009.11':'DeathofDearPerson', '2009.12':'Dowrydispute', '2009.13':'Drugabuse/Addiction', 
        '2009.14':'Failureinexamination','2009.15':'FallinSocialreputation', '2009.16':'Familyproblems', 
        '2009.17':'Loveaffairs','2009.18':'Poverty', '2009.19':'Propertydispute', '2009.20':'Unemployment', 
        '2009.21':'Causesnotknown','2009.22':'OtherSuicide', '2009.23':'TotalC'}) 
    elif i == 2010:
        df_temp2 = df_temp2.rename(columns={'State':'State','2010.9':'BankruptcyorIndebtness', '2010.10':'Illness', 
        '2010.11':'DeathofDearPerson', '2010.12':'Dowrydispute', '2010.13':'Drugabuse/Addiction', 
        '2010.14':'Failureinexamination','2010.15':'FallinSocialreputation', '2010.16':'Familyproblems', 
        '2010.17':'Loveaffairs','2010.18':'Poverty', '2010.19':'Propertydispute', '2010.20':'Unemployment', 
        '2010.21':'Causesnotknown','2010.22':'OtherSuicide', '2010.23':'TotalC'})  
    elif i == 2011:
        df_temp2 = df_temp2.rename(columns={'State':'State','2011.9':'BankruptcyorIndebtness', '2011.10':'Illness', 
        '2011.11':'DeathofDearPerson', '2011.12':'Dowrydispute', '2011.13':'Drugabuse/Addiction', 
        '2011.14':'Failureinexamination','2011.15':'FallinSocialreputation', '2011.16':'Familyproblems', 
        '2011.17':'Loveaffairs','2011.18':'Poverty', '2011.19':'Propertydispute', '2011.20':'Unemployment', 
        '2011.21':'Causesnotknown','2011.22':'OtherSuicide', '2011.23':'TotalC'}) 
    elif i == 2012:
        df_temp2 = df_temp2.rename(columns={'State':'State','2012.9':'BankruptcyorIndebtness', '2012.10':'Illness', 
        '2012.11':'DeathofDearPerson', '2012.12':'Dowrydispute', '2012.13':'Drugabuse/Addiction', 
        '2012.14':'Failureinexamination','2012.15':'FallinSocialreputation', '2012.16':'Familyproblems', 
        '2012.17':'Loveaffairs','2012.18':'Poverty', '2012.19':'Propertydispute', '2012.20':'Unemployment', 
        '2012.21':'Causesnotknown','2012.22':'OtherSuicide', '2012.23':'TotalC'}) 
    elif i == 2013:
        df_temp2 = df_temp2.rename(columns={'State':'State','2013.9':'BankruptcyorIndebtness', '2013.10':'Illness', 
        '2013.11':'DeathofDearPerson', '2013.12':'Dowrydispute', '2013.13':'Drugabuse/Addiction', 
        '2013.14':'Failureinexamination','2013.15':'FallinSocialreputation', '2013.16':'Familyproblems', 
        '2013.17':'Loveaffairs','2013.18':'Poverty', '2013.19':'Propertydispute', '2013.20':'Unemployment', 
        '2013.21':'Causesnotknown','2013.22':'OtherSuicide', '2013.23':'TotalC'}) 
    elif i == 2014:
        df_temp2 = df_temp2.rename(columns={'State':'State','2014.9':'BankruptcyorIndebtness', '2014.10':'Illness', 
        '2014.11':'DeathofDearPerson', '2014.12':'Dowrydispute', '2014.13':'Drugabuse/Addiction', 
        '2014.14':'Failureinexamination','2014.15':'FallinSocialreputation', '2014.16':'Familyproblems', 
        '2014.17':'Loveaffairs','2014.18':'Poverty', '2014.19':'Propertydispute', '2014.20':'Unemployment', 
        '2014.21':'Causesnotknown','2014.22':'OtherSuicide', '2014.23':'TotalC'}) 
    elif i == 2015:
        df_temp2 = df_temp2.rename(columns={'State':'State','2015.9':'BankruptcyorIndebtness', '2015.10':'Illness', 
        '2015.11':'DeathofDearPerson', '2015.12':'Dowrydispute', '2015.13':'Drugabuse/Addiction', 
        '2015.14':'Failureinexamination','2015.15':'FallinSocialreputation', '2015.16':'Familyproblems', 
        '2015.17':'Loveaffairs','2015.18':'Poverty', '2015.19':'Propertydispute', '2015.20':'Unemployment', 
        '2015.21':'Causesnotknown','2015.22':'OtherSuicide', '2015.23':'TotalC'}) 
    df_comp_suicide = df_comp_suicide.append(df_temp2)

df_comp_suicide.sort_values(by=['Year','State'], inplace=True)
df_state_sui = df_comp_suicide

df_comp_suicide = df_comp_suicide.set_index(['Year','State'])
In [10]:
# Suicide statistics by age and gender by State and year is captured in this dataframe

df_state_suicide_age_gender = pd.DataFrame()
df_temp3 = pd.DataFrame()

arr2= ['Year', 'State','Upto14years', 'B1529years', 'B3044years', 'B4559years',
       'Above60years', 'Male', 'Female', 'Transgender', 'Total']

years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8']
cols1 = ['State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8']
cols2 = ['State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8']
cols3 = ['State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8']
cols4 = ['State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8']
cols5 = ['State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8']
cols6 = ['State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8']

datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]

for i, j in zip(years,datacols):
    df_temp3 = df_suicide_state[j]
    year_df = add_year_column(i,np.arange(len(df_temp3)))
    df_temp3.insert(0,'Year',year_df)
    if i == 2009:
        arr = df_temp3.columns       
        df_temp3 = df_temp3.rename(columns=
             {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2010:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2011:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2012:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2013:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2014:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
             {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    elif i == 2015:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
    df_state_suicide_age_gender = df_state_suicide_age_gender.append(df_temp3)

df_state_suicide_age_gender.sort_values(by=['Year','State'], inplace=True)
df_state_sag = df_state_suicide_age_gender

df_state_suicide_age_gender = df_state_suicide_age_gender.set_index(['Year','State'])
In [11]:
# Suicide statistics by age and gender by city and year is captured in this dataframe
df_suicide_cities_age = pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[2], skiprows=c)

df_city_suicide_age_gender = pd.DataFrame()
df_temp3 = pd.DataFrame()

arr2= ['Year', 'City', 'State','Upto14years', 'B1529years', 'B3044years', 'B4559years',
       'Above60years', 'Male', 'Female', 'Transgender', 'Total']

years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8']

datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]

for i, j in zip(years,datacols):
    df_temp3 = df_suicide_cities_age[j]
    year_df = add_year_column(i,np.arange(len(df_temp3)))
    df_temp3.insert(0,'Year',year_df)
    if i == 2009:
        arr = df_temp3.columns       
        df_temp3 = df_temp3.rename(columns=
             {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2010:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2011:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2012:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2013:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2014:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
             {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    elif i == 2015:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(columns=
            {arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11]})
    df_city_suicide_age_gender = df_city_suicide_age_gender.append(df_temp3)

df_city_suicide_age_gender.sort_values(by=['Year','State','City'], inplace=True)
df_city_sag = df_city_suicide_age_gender

df_city_suicide_age_gender = df_city_suicide_age_gender.set_index(['Year','State','City'])
In [12]:
# Suicide statistics by city for causes only by year
df_suicide_cities_causes =  pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[3], skiprows=d)
#City Wise Suicide Causes for all the years
df_city_suicide_causes = pd.DataFrame()
df_temp3 = pd.DataFrame()

arr2= ['Year', 'City', 'State',
       'BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
       'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
       'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
       'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide',
       'TotalC']

years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8',
         '2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8',
         '2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8',
         '2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8',
         '2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8',
         '2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8',
         '2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8',
         '2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]

for i, j in zip(years,datacols):
    df_temp3 = df_suicide_cities_causes[j]
    year_df = add_year_column(i,np.arange(len(df_temp3)))
    df_temp3.insert(0,'Year',year_df)
    if i == 2009:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})     
    elif i == 2010:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    elif i == 2011:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    elif i == 2012:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    elif i == 2013:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    elif i == 2014:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    elif i == 2015:
        arr = df_temp3.columns
        df_temp3 = df_temp3.rename(
        columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
             arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
             arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15], 
             arr[16]: arr2[16],  arr[17]: arr2[17]})
    df_city_suicide_causes = df_city_suicide_causes.append(df_temp3)

df_city_suicide_causes.sort_values(by=['Year','State','City'], inplace=True)
df_city_sui = df_city_suicide_causes

df_city_suicide_causes = df_city_suicide_causes.set_index(['Year','State','City'])
#print(df_city_suicide_causes.head(5))
In [13]:
#dataframes from natural and unnatural causes are mergerd with the suicide cases dataframe on the states statistics
df_comp_states = df_complete.loc[df_complete['Year'] > 2008]
df_comp_states=df_comp_states.set_index(['Year','State'])
df_comp_states.sort_values(by=['Year','State'], inplace=True)

df_all_states_causes = pd.merge(df_comp_states, 
                df_comp_suicide[['BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
       'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
       'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
       'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide','TotalC']],
        on=['Year','State'], how='inner')
In [14]:
#join regular causes dataframe with suicide dataframe 
df_allcauses_year = pd.merge(df_accident_naun, df_suicide_year, left_on='Year', right_on='Year')
all_causes_year = pd.pivot_table(df_allcauses_year, index='Year', values=['NTotal', 'UnTotal', 'TotalC'])
In [15]:
def plot_stacked_bar_overall(df_natural, statename):
    df_exp2 = df_natural.loc[df_natural['State']==statename]
  
    fig = go.Figure(data=[
        go.Bar(name='NaturalCauses', y= df_exp2['Year'], x=df_exp2['NTotal'],
               marker_color='palevioletred',orientation='h'),
        go.Bar(name='UnNaturalCauses', y= df_exp2['Year'], x=df_exp2['UnTotal'],
               marker_color='dimgrey',orientation='h')
        ])  

    fig.update_layout(title='Natural and Unnatural Accidents',
                  xaxis_tickfont_size=14,   
                  yaxis=dict(title='Overall Count of Accidents',
                             titlefont_size=16,
                             tickfont_size=14),
                  legend=dict(xanchor='right', yanchor='top', bgcolor='rgba(255, 255, 255, 0)',
                                          bordercolor='rgba(255, 255, 255, 0)'),
                  barmode='stack')
    fig.show()
plot_stacked_bar_overall(df_complete,'WB')
In [16]:
#The choropleth map is used to evidently show the statistics differentiation for the years before 2014 and after 2014,
#when the state TS originated in Indian States set. Only Natural and Unnatural causes are plotted. When combined with Suicide,
#no major variation of data could be identified from the graph
def plot_choropleth_map(df_natural,year):
    pd.options.mode.chained_assignment = None
    with open("india_states-2013.json") as jsonfile1:
        states_2013 = json.load(jsonfile1)
    with open("india_states-2014-2015.json") as jsonfile2:
        states_2014_2015 = json.load(jsonfile2)

    if year < 2014:
        df_data =  df_natural.loc[(df_natural['Year']==year) & (df_natural['State'] != 'TS')]
        states = states_2013
    else:
        df_data =  df_natural.loc[df_natural['Year']==year]
        states = states_2014_2015
    df_data['GrandTotal']=df_data.NTotal + df_data.UnTotal
    arr = df_data[['State', 'GrandTotal']]
    fig1 = px.choropleth(arr, geojson=states, locations='State', color='GrandTotal',
                           color_continuous_scale="Viridis",
                           range_color=(0, 30000),
                           scope="asia",
                           labels={'GrandTotal':'Total'}
                          )
    fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, title= 'Accident Statistics for the Country')
    fig1.show()
plot_choropleth_map(df_complete,2008)
In [17]:
plot_choropleth_map(df_complete,2015)
In [18]:
def state_statistics(**kwargs):  
    year = 0
    states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
    'NL','OR','PB','RJ','SK','TN','TS','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']

    for key, value in kwargs.items(): 
        if key=='year':
            year = value
        elif key == 'state':
            state = value.upper()
  
    character1=["State","Avalanche", "Lightning", "HeatStroke", "Flood", "ColdWeather",
       "Cyclone", "Starvation", "Earthquake", "Epidemic", "TorrentialRains",
       "Other", "Natural", "AirCrash", "StructureCollapse", "Drowning", "Electrocution",
       "Explosion", "Falls", "FMAccidents", "Fire", "Firearms", "SuddenDeath",
       "KilledbyAnimal", "MQDisaster", "Poisioning", "Stampede", "Suffocation",
       "Trafficaccident", "OtherCauses", "Unknowncauses","UnNatural"]
    parent1=["","Natural","Natural","Natural","Natural","Natural",
                "Natural","Natural","Natural","Natural","Natural",
                "Natural",state,"UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural",state]
    character2=["Avalanche", "Lightning", "HeatStroke", "Flood", "ColdWeather",
               "Cyclone", "Starvation", "Earthquake", "Epidemic", "TorrentialRains",
               "Other", "Natural", "AirCrash", "StructureCollapse", "Drowning", "Electrocution",
               "Explosion", "Falls", "FMAccidents", "Fire", "Firearms", "SuddenDeath",
               "KilledbyAnimal", "MQDisaster", "Poisioning", "Stampede", "Suffocation",
               "Trafficaccident", "OtherCauses", "Unknowncauses","UnNatural","BankruptcyorIndebtness", "Illness", "DeathofDearPerson",
               "Dowrydispute", "Drugabuse/Addiction", "Failureinexamination",
               "FallinSocialreputation", "Familyproblems", "Loveaffairs", "Poverty",
               "Propertydispute", "Unemployment", "Causesnotknown", "OtherSuicide","Suicide","State"]
    parent2=["Natural","Natural","Natural","Natural","Natural",
                "Natural","Natural","Natural","Natural","Natural",
                "Natural",state,"UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural","UnNatural","UnNatural",
                "UnNatural","UnNatural","UnNatural",state,"Suicide","Suicide","Suicide",
               "Suicide","Suicide","Suicide","Suicide","Suicide","Suicide","Suicide",
               "Suicide","Suicide","Suicide","Suicide",state,""]
        
       
          
    
# Statistics for a given year is plotted    
    if year != 0:
        if year > 2007 & year < 2016:
            if state not in states_list:
                print('Allowed values for league are AP,AR,AS,BH,CG,GA,GJ,HR,HP,JK,JH,KA,KL,MP,MH,MN,ML,MZ,NL,OR,PB,RJ,SK,TN,TS,TR,UP,UK,WB,AN,CH,DH,DD,DL,LD,PY')
            else:
                pd.options.mode.chained_assignment = None
                if year == 2008:
                    df_data = df_complete.loc[(df_complete['Year']==year) & (df_complete['State']==state)]
                    df_data_2 = df_data.drop(columns='Year')
                    a1 = np.array(df_data_2.values)
                    a2 = []
                    a2 = a1.flatten()
                    dataplot = dict(character=character1, parent=parent1,values=a2)
                else:
                    df_data = df_all_states_causes.loc[year,state]
                    df_data.reset_index(drop=True,inplace=True)
                    df_data.loc[0,'State'] = state
                    a1 = np.array(df_data.values)
                    a2 = []
                    a2 = a1.flatten()
                    dataplot = dict(character=character2, parent=parent2,values=a2)    
                fig = px.sunburst(dataplot,
                      names='character',
                      parents='parent',
                      values='values',
                      )
                fig.show()
        else:
            print('Year has to be in the range of 2008 through 2015')
    else:
        print('Year has to be numeric and greater than 2007')
        
                    
state_statistics(year=2008, state='WB')
In [19]:
state_statistics(year=2015, state='TN')
In [20]:
#state_statistics(year=2008, state='AP')
In [21]:
#This function is used to plot the suicide statistics over the years based on gender and different age sequences.
#The plot indicates that the maximum suicide is committed by men and 
#the maximum suicide is committed by people of age group 15-29 years
def plot_scatter_suicide_by_year_overall():
    suicide_year_age_gender = pd.pivot_table(df_suicide_year, values=['Upto14years', 'B1529years', 'B3044years', 'B4559years',
       'Above60years','Male','Female'], index='Year')
    trace0 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.Upto14years,
        mode='markers',name='Upto14years')
    trace1 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.B1529years,
        mode='markers',name='B15-29years')
    trace2 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.B3044years,
        mode='markers',name='B30-44years')
    trace3 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.B4559years,
        mode='markers',name='B45-59years')
    trace4 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.Above60years,
        mode='markers',name='Above60years')
    trace5 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.Male,
        mode='markers',name='Male')
    trace6 = go.Scatter(
        x= suicide_year_age_gender.index,
        y =suicide_year_age_gender.Female,
        mode='markers',name='Female')

    data = [trace0,trace1,trace2,trace3,trace4,trace5,trace6]
    layout = go.Layout(title='Suicide by Age and Gender')
    fig = go.Figure(data,layout)
    fig.update_traces(mode='markers', marker_line_width=2, marker_size=8)
    fig.show()
plot_scatter_suicide_by_year_overall()
In [22]:
#The heat map accepts year as the input and generates the heat map for all the states using the top5 causes from the
#combination of natural and unnatural causes.
#The state TS originated only in the year 2014 and hence the state is only plotted if the year is 2014 or above.
def plot_heatmap_overall(year):
    if year < 2014:
        df_data_year = df_complete.loc[(df_complete['Year']==year) & (df_complete['State'] != 'TS')]
    else:
        df_data_year = df_complete.loc[df_complete['Year']==year]
    state = df_data_year['State']
    df_data = df_data_year.drop(columns=['State','UnTotal','Year','NTotal'])
    sumcolumns = df_data.sum()
    sumcolumns.sort_values(ascending=False,inplace=True)
    cols = sumcolumns.index[0:5]
    arr = cols.values.flatten()
    df_data_map = df_data_year[arr]
    datamap = df_data_map.values
    fig = go.Figure(data=go.Heatmap(
                   z=datamap,
                   x=arr,
                   y=state,
                   colorscale="armyrose"))
    fig.update_layout(title='Top 5 causes of accidents for the Year {}'.format(year),
    autosize=False,
    width=1000,
    height=1000)
    fig.update_xaxes(side="top")
    fig.show()

plot_heatmap_overall(2011)
In [23]:
#The heat map accepts year as the input and generates the heat map for the top10 states using the top5 causes from the
#combination of natural and unnatural causes.
#The state TS originated only in the year 2014 and hence the state is only plotted if the year is 2014 or above.
def plot_heatmap_top10(year):
    if year < 2014:
        df_data_year = df_complete.loc[(df_complete['Year']==year) & (df_complete['State'] != 'TS')]
    else:
        df_data_year = df_complete.loc[df_complete['Year']==year]
        
    df_data = df_data_year.drop(columns=['State','UnTotal','Year','NTotal'])
    sumcolumns = df_data.sum()
    sumcolumns.sort_values(ascending=False,inplace=True)
    cols = sumcolumns.index[0:5]
    arr = cols.values.flatten()
    arr1 = np.append(arr,['State'])
    df_data_map = df_data_year[arr1]
    df_data_map["sum"] = df_data_map.sum(axis=1)
    df_data_map = df_data_map.sort_values(by ='sum', ascending=False )
    df_data_map_top10 = df_data_map[:10]
    state = df_data_map_top10['State']
    df_data_map_top10 = df_data_map_top10.drop(columns=['sum','State'])
    datamap = df_data_map_top10.values
    fig = go.Figure(data=go.Heatmap(
                   z=datamap,
                   x=arr,
                   y=state,
                   colorscale="armyrose"))
    fig.update_layout(title='Top 5 causes of accidents for top 10 states in Year {}'.format(year),
    autosize=False,
    width=400,
    height=600)
    fig.show() 
plot_heatmap_top10(2013)
In [24]:
#Pandas Dataframe merge is used to combine all the entire causes for city for all years, natural, unnatural and suicide
#causes using the multi-index Year, State and City
df_all_cities_causes = pd.merge(df_city_acci_naun, 
                df_city_suicide_causes[['BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
       'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
       'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
       'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide','TotalC']],
        on=['Year','State','City'], how='inner')
In [25]:
#The heat map accepts year as the input and generates the heat map for all the states using the top10 causes from the
#combination of natural and unnatural causes.
#The state TS originated only in the year 2014 and hence the state is only plotted if the year is 2014 or above.
def plot_heatmap_overall_city(year):
    df_data_year = df_all_cities_causes.loc[year]
    df_data_year.dropna(how='any')
    df_data = df_data_year.drop(columns=['NTotal','UnTotal','TotalC'])
    sumcolumns = df_data.sum()
    sumcolumns.sort_values(ascending=False,inplace=True)
    col_2d = sumcolumns.index[0:7]
    col_array = col_2d.values.flatten()
    data_piv = pd.pivot_table(df_data, index=['City'], values=col_array)
    d2= data_piv.reset_index()
    d2["sum"] = d2.sum(axis=1)
    d2 = d2.sort_values(by ='sum', ascending=False )
    df_data_map_top10 = d2[:10]
    city = df_data_map_top10['City']
    df_data_map_top10 = df_data_map_top10.drop(columns=['sum','City'])
    datamap1 = df_data_map_top10.values
    fig = go.Figure(data=go.Heatmap(
                   z=datamap1,
                   x=df_data_map_top10.columns,
                   y=city,
                   colorscale="armyrose"))
    fig.update_layout(title='Top 5 causes of accidents for top 10 Cities',
    autosize=False,
    width=400,
    height=600)
    fig.show() 
plot_heatmap_overall_city(2012)
In [26]:
# This function can be used to compare maximum of 2 causes for 2 states over the course of years 2009 through 2015
def plot_compares_states_causes(**kwargs):
    state1 = 'NA'
    state2 = 'NA'
    case1 = ""
    case2 = ""
    states = [state1, state2]
    xdata = [2009, 2010, 2011, 2012, 2013, 2014, 2015]
    cause1 = 0
    cause2 = 0
    states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
    'NL','OR','PB','RJ','SK','TN','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']
    
    causes_dict = {1:"Avalanche", 2:"Lightning",3: "HeatStroke",4: "Flood", 5:"ColdWeather",
               6:"Cyclone", 7:"Starvation", 8:"Earthquake", 9:"Epidemic", 10:"TorrentialRains",
               11:"Other", 12:"AirCrash", 13:"StructureCollapse", 14:"Drowning", 15:"Electrocution",
               16:"Explosion", 17:"Falls", 18:"FMAccidents", 19:"Fire", 20:"Firearms", 21:"SuddenDeath",
               22:"KilledbyAnimal", 23:"MQDisaster", 24:"Poisioning", 25:"Stampede", 26:"Suffocation",
               27:"Trafficaccident", 28:"OtherCauses", 29:"Unknowncauses",30:'BankruptcyorIndebtness', 
               31:"Illness", 32:"DeathofDearPerson",33:"Dowrydispute", 34:"Drugabuse/Addiction", 35:"Failureinexamination",
               36:"FallinSocialreputation", 37:"Familyproblems", 38:"Loveaffairs", 39:"Poverty",
               40:"Propertydispute", 41:"Unemployment", 42:"Causesnotknown", 43:"OtherSuicide"}        
    
    for key, value in kwargs.items(): 
        if key=='state1':
            state1 =  value.upper()
        elif key == 'state2':
            state2 = value.upper()
        elif key == 'cause1':
            cause1 = value
        elif key == 'cause2':
            cause2 = value
    
# Statistics for requested states and causes are plotted for the year in range 2009 through 2015 
    if (cause1 >0 & cause1 < 47):
        case1 = causes_dict[cause1]
    
    if (cause2 >0 & cause2 < 47):
        case2 = causes_dict[cause2]
    
    if (case1 != "") & (case2 != ""):    
        if ((state1 not in states_list) and (state2 not in states_list)):
            print('Allowed values for state are AP,AR,AS,BH,CG,GA,GJ,HR,HP,JK,JH,KA,KL,MP,MH,MN,ML,MZ,NL,OR,PB,RJ,SK,TN,TR,UP,UK,WB,AN,CH,DH,DD,DL,LD,PY')
        else:
            causes = [case1, case2]
            flg = []
            ydata_state1 =[]
            ydata_state2 =[]
        
            for i in causes:
                if i in df_state_naun.columns:
                    flg.append('A')
                elif i in df_state_sui.columns:
                    flg.append('B')
        
            for i,j in zip(causes,flg):
                if j == 'A':
                    df_temp = df_state_naun.loc[df_state_naun['State'] == state1, i ]
                    y1 = df_temp.values.flatten()
                    ydata_state1.append(y1)
                if j == 'B':
                    df_temp = df_state_sui.loc[df_state_sui['State'] == state1, i ]
                    y2 = df_temp.values.flatten()
                    ydata_state1.append(y2)
            
            for i,j in zip(causes,flg):
                if j == 'A':
                    df_temp = df_state_naun.loc[df_state_naun['State'] == state2, i ]
                    y1 = df_temp.values.flatten()
                    ydata_state2.append(y1)
                if j == 'B':
                    df_temp = df_state_sui.loc[df_state_sui['State'] == state2, i ]
                    y2 = df_temp.values.flatten()
                    ydata_state2.append(y2)
 
            #plot_graph
            fig,(ax3,ax1)= plt.subplots(nrows=2, ncols=1, sharex='row',figsize=(16,8))
            legendStr = [case1, case2]
            x = xdata
            ax3.set_ylabel(case1)
            ax1.set_ylabel(case2)
            ax1.set_xlabel('Years')
    
            ax1.plot(x,ydata_state1[0], color='palegreen', marker='o',label=state1)
            ax1.plot(x,ydata_state2[0], color='lightskyblue', marker='o',label=state2)
            ax1.legend(loc='upper center')

            ax3.plot(x,ydata_state1[1], color='palegreen', marker='o',label=state1)
            ax3.plot(x,ydata_state2[1], color='lightskyblue', marker='o',label=state2)
            ax3.legend(loc='upper center')

            plt.setp(ax3.get_xticklabels(), visible=False)
            ax3.tick_params(axis='x', length=0)
            plt.show()
    else:
        print("Requires minimum 2 causes to be analysed for this function")

            
plot_compares_states_causes(year=2007, state1='TN',state2='KL',cause1=20,cause2=35)
    
In [27]:
#for a given year the states with and cities with the highest number of accidents
def plot_states(year):
    # #join regular causes dataframe with suicide dataframe to do the line plots for years in range
    df_data_state = df_state_naun.loc[df_state_naun['Year']==year, ['State', 'NTotal','UnTotal']]
    df_data_state['GrandTotal'] = df_data_state['NTotal'] + df_data_state['UnTotal']
    df_data_state.sort_values(by='GrandTotal', ascending=False, inplace=True)
    x = df_data_state['State'].head(10).values.flatten()
    y = df_data_state['GrandTotal'].head(10).values.flatten()
    plt.bar(x, y)
    plt.ylabel("Number of Accidents")
    plt.xlabel("Major States")
    plt.title("Highest Accident Prone States For The Year {}".format(year))
    plt.show()
    
def plot_cities(year):
    # #join regular causes dataframe with suicide dataframe to do the line plots for years in range
    df_data_city = df_city_naun.loc[df_city_naun['Year']==year, ['City', 'NTotal','UnTotal']]
    df_data_city['GrandTotal'] = df_data_city['NTotal'] + df_data_city['UnTotal']
    df_data_city.sort_values(by='GrandTotal', ascending=False, inplace=True)
    x = df_data_city['City'].head(10).values.flatten()
    y = df_data_city['GrandTotal'].head(10).values.flatten()
    plt.bar(x, y, color='palegreen')
    plt.xticks(rotation=70)
    plt.ylabel("Number of Accidents")
    plt.xlabel("Major Cities")
    plt.title("Highest Accident Prone Cities For The Year {}".format(year))
    plt.show()

def plot_states_cities(year):
    plot_states(year)
    plot_cities(year)
    
plot_states_cities(2012)
In [ ]: